#Load packages
source("package_load_install.R")
#Coulour pallettes
# col_fil <- pal_jco("default")(10)
# col_scale <- scale_color_jco()
col_fil <- brewer.pal(10, "Dark2")#pal_jco("default")(10)
col_scale <- scale_fill_distiller(palette = "Dark2")
theme_set(theme_classic(base_family = "sans")) #sans = TT Arial, mono = TT courier new, serif = TT Times new roman
source("utils.R")
#Load phyloseq objects
ps.cur <- readRDS("ps.2020_05_AMS.2020-03-31.curated.RDS")
ps.cur## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2063 taxa and 180 samples ]
## sample_data() Sample Data: [ 180 samples by 111 sample variables ]
## tax_table() Taxonomy Table: [ 2063 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 2063 tips and 2061 internal nodes ]
ps<-ps.cur
asv_label <- function(ps, species=T, collapse_species=2){
if (species==T){ps@tax_table[,"Species"][lapply(strsplit(ps@tax_table[,7],"/"), length)>collapse_species] <- "Ambigous"}
label=taxa_names(ps)
label[!is.na(ps@tax_table[,1])] <- paste0("k_",ps@tax_table[,1])[!is.na(ps@tax_table[,1])]
label[!is.na(ps@tax_table[,2])] <- paste0("p_",ps@tax_table[,2])[!is.na(ps@tax_table[,2])]
label[!is.na(ps@tax_table[,3])] <- paste0("c_",ps@tax_table[,3])[!is.na(ps@tax_table[,3])]
label[!is.na(ps@tax_table[,4])] <- paste0("o_",ps@tax_table[,4])[!is.na(ps@tax_table[,4])]
label[!is.na(ps@tax_table[,5])] <- paste0("f_",ps@tax_table[,5])[!is.na(ps@tax_table[,5])]
label[!is.na(ps@tax_table[,6])] <- paste0("g_",ps@tax_table[,6])[!is.na(ps@tax_table[,6])]
if (species==T){
label[!is.na(ps@tax_table[,7])] <- paste0("s_",ps@tax_table[,6],"_", ps@tax_table[,7])[!is.na(ps@tax_table[,7])]}
label <- make.unique(label)
return(label)
}
taxa_names(ps.cur) <- asv_label(ps.cur)
#Rename ambigous species names
rename_entries <- function(x) {
if_else(str_count(x, "/") > 1, "Ambigous", x)
}
taxa.names <- taxa_names(ps.cur)
tax.nonamb <- ps.cur@tax_table %>% as.data.frame() %>% mutate(across(Species, rename_entries))
row.names(tax.nonamb) <- taxa.names
head(tax.nonamb)tax_table(ps.cur) <- as(tax.nonamb,"matrix")
#Use curated, ASV relabbeled OTU table for analysis
PSB <- ps.curOnly run for original non-randomized metadata
meta.microbiome <- readxl::read_xlsx("1. Metadata - Microbiome Excel characteristics.xlsx")
meta.24hfood <- readxl::read_xlsx("3. Metadata - Microbiome Excel 24h food intake.xlsx")
meta.cortisol <- readxl::read_xlsx("Cortisol_variabelen_microbiome.xlsx")
#meta.ffqfood <- readxl::read_xlsx("4. Metadata - Microbiome Excel FFQ food intake.xlsx")
meta.outcomes <- readxl::read_xlsx("Infant outcomes.xlsx") %>% dplyr::rename(AMS_ID = Subject_ID)
meta.dat <- data.frame(sample_data(PSB))
meta.dat$ID <- meta.dat$Ext_ID
meta <- dplyr::left_join(meta.dat,meta.microbiome,by="ID")
meta <- dplyr::left_join(meta,meta.cortisol,by="ID")
meta <- dplyr::left_join(meta,meta.24hfood,by="ID")
meta <- meta %>% tidyr::separate_wider_regex(Int_ID, c(AMS_ID = ".*", "_", Timecode = ".*"),cols_remove=FALSE)#Split internal id to separate id and timepint code
meta <- dplyr::left_join(meta,meta.outcomes,by="AMS_ID",keep=FALSE) #Do not keep non-matched IDs as these are for excluded infants
#meta <- dplyr::left_join(meta,meta.ffqfood,by="ID")
meta <- type.convert(meta) %>% as.data.frame()
rownames(meta) <- rownames(meta.dat)
sample_data(PSB) <- meta
saveRDS(PSB,"AMS_phyloseq.RDS")All metadata columns:
## [1] "ALA_g" "Alcohol_.g."
## [3] "Alcohol_totaal_en%" "Alcohol_totaal_g"
## [5] "AMP_Plate_ID" "AMS_ID"
## [7] "Antibio_Name" "Antibio_Per"
## [9] "Antibio_Use.x" "Antibio_Use.y"
## [11] "Available" "Bio_Safety_Level"
## [13] "Birth_weight_child" "BMI_mother"
## [15] "Bristol_Scale" "Calcium_.mg."
## [17] "Calcium_mg" "Cholesterol_mg"
## [19] "Collection_Date" "Collection_Time"
## [21] "Contact_AMC" "Contact_Person_Email"
## [23] "Date_add_Meta" "Datum_moment1"
## [25] "Datum_moment2" "Datum_moment3"
## [27] "Department" "DHA_g"
## [29] "Diet_Journal" "Education_mother"
## [31] "Eiwit_.g." "Eiwit_dierlijk_g"
## [33] "Eiwit_plantaardig_g" "Eiwit_totaal_en%"
## [35] "Eiwit_totaal_g" "Energie_.kcal."
## [37] "Energie_kcal" "Energie_kjoule"
## [39] "EPA_g" "EPDS_stress_score"
## [41] "Ethnicity_mother" "Ext_ID"
## [43] "Foliumzuur_.ug." "Follow_ID"
## [45] "Folaat_equivalenten_ug" "Folaat_ug"
## [47] "Foodscore_g" "Gender_child"
## [49] "Geo_Loc_Name" "Hair cortisol"
## [51] "Health_mother" "HM_cortisol_AUC_1"
## [53] "HM_cortisol_AUC_2" "HM_cortisol_morning_peak_1"
## [55] "HM_cortisol_morning_peak_2" "Hospitalized_child"
## [57] "Host_Age_Years" "Host_body_Mass_Index"
## [59] "Host_Body_Product" "Host_Body_Site"
## [61] "Host_Common_Name" "Host_Diet"
## [63] "Host_Health" "Host_Height"
## [65] "Host_Sex" "Host_Taxid"
## [67] "Host_Tot_Mass" "ID"
## [69] "Ijzer_.mg." "IJzer_haem_mg"
## [71] "IJzer_non_haem_mg" "IJzer_totaal_mg"
## [73] "Index_Name" "Infant_headc_3m"
## [75] "Infant_headc_w2" "Infant_headc_w4"
## [77] "Infant_headc_w8" "Infant_lengt_3M"
## [79] "Infant_lengt_w2" "Infant_lengt_w4"
## [81] "Infant_lengt_w8" "Infant_sleep_>6"
## [83] "Infant_sleep_day" "Infant_sleep_naps"
## [85] "Infant_sleep_night" "Infant_sleep_wake"
## [87] "Infant_temp_act" "Infant_temp_app"
## [89] "Infant_temp_cudd" "Infant_temp_dist"
## [91] "Infant_temp_dura" "Infant_temp_fall"
## [93] "Infant_temp_fear" "Infant_temp_hip"
## [95] "Infant_temp_lip" "Infant_temp_NEG"
## [97] "Infant_temp_perc" "Infant_temp_REG"
## [99] "Infant_temp_sad" "Infant_temp_smile"
## [101] "Infant_temp_soot" "Infant_temp_SUR"
## [103] "Infant_temp_voc" "Infant_weight_3m"
## [105] "Infant_weight_w2" "Infant_weight_w4"
## [107] "Infant_weight_w8" "Input"
## [109] "Int_ID" "Jodium_.ug."
## [111] "JTVEA_stress_score" "JTVEN_stress_score"
## [113] "JTVPA_stress_score" "JTVPN_stress_score"
## [115] "JTVSA_stress_score" "Kalium_.mg."
## [117] "Koolhydr_.g." "Koolhydraten_totaal_en%"
## [119] "Koolhydraten_totaal_g" "Lib_Conc_final"
## [121] "Lib_Conc_sub" "Lib_Const_Meth"
## [123] "Lib_Pool" "Lib_Ratio"
## [125] "Lib_Size" "Library_ID"
## [127] "Linolzuur_g" "LSCr_stress_score"
## [129] "Magnesium_.mg." "Magnesium_mg"
## [131] "Maternal_age" "Methionine_mg"
## [133] "Mid_F" "Mid_R"
## [135] "Modus_partus" "Mono_en_disacchariden_totaal_g"
## [137] "Natrium_.mg." "Nicotinezuur_.mg."
## [139] "Nicotinezuur_mg" "Nucl_Acid_260230"
## [141] "Nucl_Acid_260280" "Nucl_Acid_Amp"
## [143] "Nucl_Acid_AMP_Conc" "Nucl_Acid_Amp_ID"
## [145] "Nucl_Acid_AMP_Used" "Nucl_Acid_Beat_ID"
## [147] "Nucl_Acid_Buffer_Date" "Nucl_Acid_Conc"
## [149] "Nucl_Acid_Ext" "Nucl_Acid_Ext_ID"
## [151] "Nucl_Acid_Iso_Date" "PCR_Cond"
## [153] "PCR_Primers" "Polysacchariden_totaal_g"
## [155] "Primary_Test_Var" "Project_name"
## [157] "PSS_stress_score" "reads_Chloroplast"
## [159] "reads_Contaminants" "reads_mapped"
## [161] "reads_merged" "reads_Mitochondria"
## [163] "reads_pf" "reads_phix"
## [165] "reads_total" "Remarks"
## [167] "Retinol_activiteit_equiv.RAE_ug" "Saliva_cortisol_morning_peak"
## [169] "Samp_Collect_Device" "Samp_Mat_Process"
## [171] "Samp_Size" "Samp_Store_Date_AMC"
## [173] "Samp_Store_Loc_AMC" "Samp_Store_Temp_AMC"
## [175] "Samp_Store_Temp_Local" "Samp_Vol_We_DNA_Ext"
## [177] "Season_milk_collection" "Selenium_.ug."
## [179] "Seq_ID" "Seq_Meth"
## [181] "SOP" "STAIs_stress_score"
## [183] "STAIt_stress_score" "Study_group"
## [185] "Subject_ID" "Target_Gene"
## [187] "Target_Subfragment" "Time_point"
## [189] "Time_Point" "Timecode"
## [191] "timepoint" "Timepoint"
## [193] "Verz_vet_.g." "Vet_.g."
## [195] "Vet_totaal_en%" "Vet_totaal_g"
## [197] "Vetzuren_enkelv_onverz_g" "Vetzuren_meerv_onverz_g"
## [199] "Vetzuren_n-3_meerv_onverz_cis_g" "Vetzuren_n-6_meerv_onverz__cis_g"
## [201] "Vetzuren_totaal_trans_g" "Vetzuren_totaal_verzadigd_g"
## [203] "Vezels_.g." "Vit_A_.ug."
## [205] "Vit_B1_.mg." "Vit_B12_.ug."
## [207] "Vit_B2_.mg." "Vit_B6_.mg."
## [209] "Vit_C_.mg." "Vit_D_.ug."
## [211] "Vit_E_.mg." "Vitamine_B1_mg"
## [213] "Vitamine_B12_ug" "Vitamine_B2_mg"
## [215] "Vitamine_B6_totaal_mg" "Vitamine_C_mg"
## [217] "Vitamine_D_totaal_ug" "Vitamine_E_totaal_mg"
## [219] "Voedingsvezel_totaal_en%" "Voedingsvezel_totaal_g"
## [221] "Water_.g." "Your_Name"
## [223] "Zink_.mg." "Zink_mg"
## [225] "Zout_.g." "Aantal_vm"
meta.dat$`Weight month 1` <- as.numeric(meta.dat$`Weight month 1`)
meta.dat$`Weight month 6` <- as.numeric(meta.dat$`Weight month 6`)
meta.dat$`Height month 1` <- as.numeric(meta.dat$`Height month 1`)
meta.dat$`Height month 6` <- as.numeric(meta.dat$`Height month 6`)
meta.dat$`30days` <- 30
meta.dat$`180days` <- 180
svy <- addWGSR(data = anthro3, sex = "sex", firstPart = "weight",
secondPart = "height", index = "wfh")
# Weight for height
meta.dat <- addWGSR(data = meta.dat, sex = "Infant sex", firstPart = "Weight month 1",
secondPart = "Height month 1", index = "wfl", output = "whz.1m")
meta.dat <- addWGSR(data = meta.dat, sex = "Infant sex", firstPart = "Weight month 6",
secondPart = "Height month 6", index = "wfl", output = "whz.6m")
meta.dat$delta.whz <- meta.dat$whz.6m - meta.dat$whz.1m
# Height for age
meta.dat <- addWGSR(data = meta.dat, sex = "Infant sex", firstPart = "Height month 1",
secondPart = 'Sample_age(1MO)', index = "lfa", output = "haz.1m")
meta.dat <- addWGSR(data = meta.dat, sex = "Infant sex", firstPart = "Height month 6",
secondPart = '180days', index = "lfa", output = "haz.6m")
meta.dat$delta.haz <- meta.dat$haz.6m - meta.dat$haz.1m
#Weight for age
meta.dat <- addWGSR(data = meta.dat, sex = "Infant sex", firstPart = "Weight month 1",
secondPart = 'Sample_age(1MO)', index = "lfa", output = "waz.1m")
meta.dat <- addWGSR(data = meta.dat, sex = "Infant sex", firstPart = "Weight month 6",
secondPart = '180days', index = "lfa", output = "waz.6m")
meta.dat$delta.waz <- meta.dat$waz.6m - meta.dat$waz.1m#colnames(meta.outcomes)
key.vars <- c("Stress_group",
"Sex_child",
"PSS_stress_score",
"LSCr_stress_score",
"Education_mother",
"BMI_mother",
"Timepoint"
# "Hair cortisol",
# "HM_cortisol_AUC_1",
#"Saliva_cortisol_morning_peak"
)
key.vars.inf <- c("Infant_temp_NEG",
"Infant_temp_REG",
"Infant_temp_SUR",
"Infant_weight_w2",
"Infant_weight_3m",
"Timepoint"
)
key.vars.cortisol <- c("Hair cortisol",
"HM_cortisol_AUC_1",
"HM_cortisol_AUC_2",
# "HM_cortisol_morning_peak_1",
# "HM_cortisol_morning_peak_2",
"Saliva_cortisol_morning_peak",
"Timepoint"
)Remove taxa not seen found in at least 3 samples with a total count of minimum 1000 reads. This protects against an OTU with small mean & trivially large C.V.
prev <- 3/nrow(PSB@sam_data)
PSB.fil = metagMisc::phyloseq_filter_prevalence(PSB, prev.trh = prev, abund.trh = 500, abund.type = "total", threshold_condition = "AND")
spec <- specnumber(as.matrix(otu_table(PSB.fil))) %>% sort(decreasing=FALSE)
head(spec)## 2020_0002_00_SA506SA703 2020_0002_00_SB503SA702 2020_0002_00_SA507SA704
## 16 18 25
## 2020_0002_00_SA505SA703 2020_0002_00_SB501SA701 2020_0002_00_SB505SA703
## 25 28 28
## [1] 0.9753507
97.5% of reads were kept after filtering, but number of taxa reduced from 2063 to 250
meta <- PSB@sam_data %>% as.data.frame()
save(list=c("PSB","PSB.CSS","meta","key.vars","key.vars.inf","key.vars.cortisol"),
file = "Curated_AMS.RData"
)Read count per sample
## 2020_0002_00_SA501SA701 2020_0002_00_SA503SA703 2020_0002_00_SA505SA705
## 12170 3138 33535
## 2020_0002_00_SA507SA707 2020_0002_00_SA501SA702 2020_0002_00_SA503SA704
## 7083 47267 11266
## 2020_0002_00_SA505SA706 2020_0002_00_SA507SA708 2020_0002_00_SA501SA703
## 48087 68730 11609
## 2020_0002_00_SA503SA705 2020_0002_00_SA505SA707 2020_0002_00_SA507SA709
## 59026 15570 61885
## 2020_0002_00_SA501SA704 2020_0002_00_SA503SA706 2020_0002_00_SA506SA709
## 9701 30514 2392
## 2020_0002_00_SA508SA711 2020_0002_00_SA502SA706 2020_0002_00_SA504SA708
## 42542 28729 31251
## 2020_0002_00_SA506SA710 2020_0002_00_SA508SA712 2020_0002_00_SA502SA707
## 5699 48619 49937
## 2020_0002_00_SA504SA709 2020_0002_00_SA506SA711 2020_0002_00_SA508SA701
## 1395 24927 42264
## 2020_0002_00_SA502SA708 2020_0002_00_SA504SA710 2020_0002_00_SA506SA712
## 40988 29354 7819
## 2020_0002_00_SA508SA702 2020_0002_00_SA501SA708 2020_0002_00_SA503SA710
## 30524 21012 48867
## 2020_0002_00_SA505SA712 2020_0002_00_SA507SA702 2020_0002_00_SA501SA709
## 6856 31086 55488
## 2020_0002_00_SA503SA711 2020_0002_00_SA506SA702 2020_0002_00_SA508SA704
## 8145 8709 22643
## 2020_0002_00_SA502SA711 2020_0002_00_SA504SA701 2020_0002_00_SA506SA703
## 10307 9782 128
## 2020_0002_00_SA508SA705 2020_0002_00_SA502SA712 2020_0002_00_SA504SA702
## 30130 13361 771
## 2020_0002_00_SA506SA704 2020_0002_00_SA508SA706 2020_0002_00_SA501SA712
## 5410 12356 1358
## 2020_0002_00_SA503SA702 2020_0002_00_SB501SA701 2020_0002_00_SB503SA703
## 44124 692 9309
## 2020_0002_00_SB505SA705 2020_0002_00_SB507SA707 2020_0002_00_SB501SA702
## 2080 18798 54142
## 2020_0002_00_SB503SA704 2020_0002_00_SB505SA706 2020_0002_00_SB507SA708
## 13047 14933 21337
## 2020_0002_00_SB501SA703 2020_0002_00_SB503SA705 2020_0002_00_SB505SA707
## 10725 4224 50136
## 2020_0002_00_SB507SA709 2020_0002_00_SB501SA704 2020_0002_00_SB503SA706
## 40828 52600 2550
## 2020_0002_00_SB505SA708 2020_0002_00_SB507SA710 2020_0002_00_SB501SA705
## 3664 30507 5999
## 2020_0002_00_SB502SA706 2020_0002_00_SB504SA708 2020_0002_00_SB506SA710
## 10435 3817 8431
## 2020_0002_00_SB508SA712 2020_0002_00_SB502SA707 2020_0002_00_SB504SA709
## 52132 50619 74594
## 2020_0002_00_SB506SA711 2020_0002_00_SB508SA701 2020_0002_00_SB502SA708
## 55369 7339 11666
## 2020_0002_00_SB505SA711 2020_0002_00_SB507SA701 2020_0002_00_SB501SA708
## 25040 6529 43807
## 2020_0002_00_SB503SA710 2020_0002_00_SB505SA712 2020_0002_00_SB507SA702
## 19514 11295 26850
## 2020_0002_00_SB501SA709 2020_0002_00_SB503SA711 2020_0002_00_SB505SA701
## 9058 4869 41397
## 2020_0002_00_SB507SA703 2020_0002_00_SB501SA710 2020_0002_00_SB503SA712
## 2612 47965 26051
## 2020_0002_00_SB505SA702 2020_0002_00_SB507SA704 2020_0002_00_SB508SA705
## 37514 16116 6926
## 2020_0002_00_SB502SA712 2020_0002_00_SB504SA702 2020_0002_00_SB506SA704
## 49883 9721 70143
## 2020_0002_00_SB508SA706 2020_0002_00_SB502SA701 2020_0002_00_SA502SA702
## 35261 10753 8892
## 2020_0002_00_SA504SA704 2020_0002_00_SA506SA706 2020_0002_00_SA508SA708
## 6553 6899 583
## 2020_0002_00_SA502SA703 2020_0002_00_SA504SA705 2020_0002_00_SA506SA707
## 32340 9470 3065
## 2020_0002_00_SA508SA709 2020_0002_00_SA502SA704 2020_0002_00_SA504SA706
## 35659 41367 1226
## 2020_0002_00_SA506SA708 2020_0002_00_SA508SA710 2020_0002_00_SA502SA705
## 3670 35748 42246
## 2020_0002_00_SA504SA707 2020_0002_00_SA507SA710 2020_0002_00_SA501SA705
## 48837 46943 13095
## 2020_0002_00_SA503SA707 2020_0002_00_SA505SA709 2020_0002_00_SA507SA711
## 8567 95344 3124
## 2020_0002_00_SA501SA706 2020_0002_00_SA503SA708 2020_0002_00_SA505SA710
## 39168 4801 3880
## 2020_0002_00_SA507SA712 2020_0002_00_SA501SA707 2020_0002_00_SA503SA709
## 55561 45894 45512
## 2020_0002_00_SA505SA711 2020_0002_00_SA507SA701 2020_0002_00_SA502SA709
## 7868 48035 16526
## 2020_0002_00_SA504SA711 2020_0002_00_SA506SA701 2020_0002_00_SA508SA703
## 8544 9087 85395
## 2020_0002_00_SA502SA710 2020_0002_00_SA505SA701 2020_0002_00_SA507SA703
## 55120 4297 6078
## 2020_0002_00_SA501SA710 2020_0002_00_SA503SA712 2020_0002_00_SA505SA702
## 60214 6021 22725
## 2020_0002_00_SA507SA704 2020_0002_00_SA501SA711 2020_0002_00_SA503SA701
## 319 6439 15568
## 2020_0002_00_SA505SA703 2020_0002_00_SA507SA705 2020_0002_00_SA502SA701
## 1336 13544 1610
## 2020_0002_00_SA504SA703 2020_0002_00_SB502SA702 2020_0002_00_SB504SA704
## 50243 1844 27496
## 2020_0002_00_SB506SA706 2020_0002_00_SB508SA708 2020_0002_00_SB502SA703
## 12405 2212 35592
## 2020_0002_00_SB504SA705 2020_0002_00_SB506SA707 2020_0002_00_SB508SA709
## 40231 55148 56956
## 2020_0002_00_SB502SA704 2020_0002_00_SB504SA706 2020_0002_00_SB506SA708
## 8227 7126 12352
## 2020_0002_00_SB508SA710 2020_0002_00_SB502SA705 2020_0002_00_SB504SA707
## 29074 41753 44304
## 2020_0002_00_SB506SA709 2020_0002_00_SB508SA711 2020_0002_00_SB503SA707
## 3141 1099 11251
## 2020_0002_00_SB505SA709 2020_0002_00_SB507SA711 2020_0002_00_SB501SA706
## 6979 36706 38258
## 2020_0002_00_SB503SA708 2020_0002_00_SB505SA710 2020_0002_00_SB507SA712
## 44937 35155 46866
## 2020_0002_00_SB501SA707 2020_0002_00_SB503SA709 2020_0002_00_SB506SA712
## 20985 2817 49570
## 2020_0002_00_SB508SA702 2020_0002_00_SB502SA709 2020_0002_00_SB504SA711
## 64960 34917 21633
## 2020_0002_00_SB506SA701 2020_0002_00_SB508SA703 2020_0002_00_SB502SA710
## 9238 17224 33517
## 2020_0002_00_SB504SA712 2020_0002_00_SB506SA702 2020_0002_00_SB508SA704
## 3146 51669 2157
## 2020_0002_00_SB502SA711 2020_0002_00_SB504SA701 2020_0002_00_SB506SA703
## 38319 44373 2536
## 2020_0002_00_SB501SA711 2020_0002_00_SB503SA701 2020_0002_00_SB505SA703
## 4510 64187 2007
## 2020_0002_00_SB507SA705 2020_0002_00_SB501SA712 2020_0002_00_SB503SA702
## 46335 41297 508
Samples look quite similar in compostion, apart from sample 37281 and 37291. 377281 had very low observed ASV count, so it is removed.
### Phylum
#remotes::install_github("jstokholm/rabuplot")
library(rabuplot)
rabu.group <- rabuplot(
PSB,
violin = TRUE, predictor = "Stress_group", p_adjust = TRUE, p_stars = TRUE ,colors = col_fil[1:2],text_angle_x = 45)
rabu.group <- rabu.group +
guides(fill=guide_legend("Stress group"))
rabu.grouprabu.group.adj_time <- rabuplot(
PSB,
violin = TRUE, predictor = "Stress_group",
Time = "Timecode",
p_adjust = TRUE, p_stars = TRUE ,colors = col_fil[1:2], stats = "non-parametric")
rabu.group.adj_timeAnova
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Shannon ~ variable, data = rich)
##
## $variable
## diff lwr upr p adj
## Stress-Control -0.1118888 -0.3039283 0.08015081 0.2517858
Anova
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Shannon ~ variable, data = rich)
##
## $variable
## diff lwr upr p adj
## p24-p10 -0.01552703 -0.18248 0.151426 0.8545917
Permanova statistics
Permanova statistics
#Permanova
Permanova - time points separately
ps <- PSB
ps <- tax_glom(ps, "Species", NArm = FALSE) #select a level to compare
library(DESeq2)
# remove all error taxa
ps.ds <- phyloseq_to_deseq2(ps, ~Stress_group + Timepoint)
# solve rows without a zero, deseq need to calculate the geometric zero,
cts <- counts(ps.ds)
geoMeans <- apply(cts, 1, function(row) if (all(row == 0)) 0 else exp(mean(log(row[row != 0]))))
dds <- estimateSizeFactors(ps.ds, geoMeans=geoMeans)
ps.ds <- DESeq2::DESeq(dds, test="Wald", fitType="parametric")
# result
res = results(ps.ds, cooksCutoff = FALSE)
sigtab = res
sigtab = cbind(as(sigtab, "data.frame"), as(tax_table(ps)[rownames(sigtab), ], "matrix"))
head(sigtab)EnhancedVolcano::EnhancedVolcano(sigtab,
lab = sub("^[^_]*_", "", rownames(res)),
x = 'log2FoldChange',
y = 'pvalue',
pCutoff = 0.05,
FCcutoff = 0.5)# Select significant ASVs
tab <- subset(sigtab, padj < 0.05)
OTU <- unique(tab)
##
ps.rel <- transform_sample_counts(PSB, function(x) x/sum(x)*100)
ps.rel.sig <- prune_taxa(colnames(otu_table(ps.rel)) %in% rownames(OTU) , ps.rel)
## at least 1% relative abundance appearance in 1% samples
mat <- as.matrix(otu_table(ps.rel.sig))
species2keep <- colnames(mat)[rowSums(mat>=2)/length(colnames(mat))> 0.1]
species2keep <- species2keep[!is.na(species2keep)]
sigtab.p.prev <- tab[species2keep,]
sigtabgen = subset(sigtab.p.prev, !is.na(Genus))
# Phylum order
x = tapply(sigtabgen$log2FoldChange, sigtabgen$Phylum, function(x) max(x))
x = sort(x, TRUE)
sigtabgen$Phylum = factor(as.character(sigtabgen$Phylum), levels=names(x))
# Genus order
x = tapply(sigtabgen$log2FoldChange, sigtabgen$Genus, function(x) max(x))
x = sort(x, TRUE)
sigtabgen$Genus = factor(as.character(sigtabgen$Genus), levels=names(x))
ggplot(sigtabgen, aes(y=Genus, x=log2FoldChange, color=Phylum)) +
geom_vline(xintercept = 0.0, color = "gray", size = 0.5) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5)) +
scale_color_manual(values = col_fil[3:4])deseq2.log2fold.box <- ggplot(sigtabgen, aes(y=Genus, x=log2FoldChange, color=Family)) +
geom_vline(xintercept = 0.0, color = "gray", size = 0.5) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust=0.5),
#plot.margin = margin(2, 0, 0, 0, "cm")
) +
scale_color_manual(values = col_fil) +
ylab("ASV genus") +
ggtitle("\U2190 Controls HS \U2192")
deseq2.log2fold.box theme_set(theme_bw())
scale_fill_discrete <- function(palname = "Set1", ...) {
scale_fill_brewer(palette = palname, ...)
}
tab <- subset(sigtab, padj < 0.05)
OTU <- unique(tab)
ps.rel <- transform_sample_counts(ps, function(x) x/sum(x)*100)
ps.rel.sig <- prune_taxa(colnames(otu_table(ps.rel)) %in% rownames(OTU) , ps.rel)
#select the rel-abun > 0.1%
# at least 1% relative abundance appearance in 5% samples
mat <- as.matrix(otu_table(ps.rel.sig))
species2keep <- colnames(mat)[rowSums(mat>=1)/length(colnames(mat))> 0.1]
species2keep## [1] "s_Enterobacter_Ambigous.3" "s_Klebsiella_Ambigous.3"
## [3] "s_Acinetobacter_Ambigous.3" "g_Acinetobacter.40"
## [5] "s_Stenotrophomonas_maltophilia.9" "g_Veillonella.18"
## [7] "s_Gemella_Ambigous" "s_Streptococcus_Ambigous.5"
## [9] "s_Lactobacillus_Ambigous.2" "s_Klebsiella_michiganensis/oxytoca"
## [11] NA NA
## [13] NA NA
## [15] NA NA
## [17] NA NA
## [19] NA NA
## [21] NA NA
## [23] NA NA
## [25] NA NA
## [27] NA NA
## [29] NA NA
## [31] NA NA
## [33] NA NA
## [35] NA NA
## [37] NA NA
## [39] NA NA
## [41] NA NA
## [43] NA NA
## [45] NA NA
## [47] NA NA
## [49] NA NA
## [51] NA NA
## [53] NA NA
## [55] NA NA
## [57] NA NA
## [59] NA NA
## [61] NA NA
## [63] NA NA
## [65] NA NA
## [67] NA NA
## [69] NA NA
## [71] NA NA
## [73] NA NA
## [75] NA NA
## [77] NA NA
## [79] NA NA
## [81] NA NA
## [83] NA NA
## [85] NA NA
## [87] NA NA
## [89] NA NA
## [91] NA NA
## [93] NA NA
## [95] NA NA
## [97] NA NA
## [99] NA NA
## [101] NA NA
## [103] NA NA
## [105] NA NA
## [107] NA NA
## [109] NA NA
## [111] NA NA
## [113] NA NA
## [115] NA NA
## [117] NA NA
## [119] NA NA
## [121] NA NA
## [123] NA NA
## [125] NA NA
## [127] NA NA
## [129] NA NA
## [131] NA NA
## [133] NA NA
## [135] NA NA
## [137] NA NA
## [139] NA NA
## [141] NA NA
## [143] NA NA
## [145] NA NA
## [147] NA NA
## [149] NA NA
## [151] NA NA
## [153] NA NA
## [155] NA NA
## [157] NA NA
## [159] NA NA
## [161] NA NA
## [163] NA NA
## [165] NA NA
## [167] NA NA
ps.rel.sig <- prune_taxa(species2keep,ps.rel.sig)
otu_abun_select <- data.frame(otu_table(ps.rel.sig), check.names = F)
#import relavant metadata
metadata <- data.frame(sample_data(ps.rel.sig))
tax.clean <- data.frame(tax_table(ps.rel.sig))
# create a variable to define the subgroup
# order the matrix by the subgroup
# metadata$Treatment <- factor(metadata$Treatment, levels = groups)
# meta_order <- metadata[order(metadata$Treatment),]
# # re_order the col
# mat <- otu_abun_select
# mat <- as.matrix(mat[,rownames(meta_order)])
base_mean = rowMeans(mat)
mat_scaled = t(scale(t(mat)))
# calculate heatmap annotation
tax_heatmap <- tax.clean[colnames(mat_scaled),]
tax_heatmap$sign <- sapply(rownames(tax_heatmap), function(x) ifelse(x %in% rownames(sigtab),"*","ns"))
index <- match(rownames(tax_heatmap), rownames(tab))
index## [1] 1 2 3 4 NA 6 7 NA 9 10 11 12
tax_heatmap$p_val <- tab$padj[index]
tax_heatmap <- tax_heatmap[order(tax_heatmap$p_val),]
max(c(-log10(tax_heatmap$p_val)))## [1] NA
# my_palette <- c("darkblue", "darkgoldenrod1", "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen",
# "deeppink", "khaki2", "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue",
# "royalblue4", "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen",
# "darkorchid", "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey", "darkblue", "darkgoldenrod1",
# "darkseagreen", "darkorchid", "darkolivegreen1", "lightskyblue", "darkgreen", "deeppink", "khaki2",
# "firebrick", "brown1", "darkorange1", "cyan1", "royalblue4", "darksalmon", "darkblue", "royalblue4",
# "dodgerblue3", "steelblue1", "lightskyblue", "darkseagreen", "darkgoldenrod1", "darkseagreen", "darkorchid",
# "darkolivegreen1", "brown1", "darkorange1", "cyan1", "darkgrey")
my_palette <- rep(col_fil,5)
# adjust tax_heatmap genus and species, pasteurella
tax_heatmap$Genus <- as.character(tax_heatmap$Genus)
tax_heatmap$Species <- as.character(tax_heatmap$Species)
library(circlize)
tax_heatmap <- tax_heatmap[order(rownames(mat_scaled)),]
common_rows <- intersect(rownames(tax_heatmap), colnames(mat_scaled))
tax_heatmap <- tax_heatmap[common_rows, ]
mat_scaled <- t(mat_scaled[,common_rows])
#rownames(tax_heatmap)<- tax_heatmap$Species
rownames(mat_scaled) <-rownames(tax_heatmap)
## Order by stress group
# meta <- meta %>% data.frame %>% arrange(Stress_group)
# mat_scaled <- mat_scaled[,rownames(meta)]
plot <- mat_scaled
genus <- unique(as.character(tax_heatmap$Genus))
#genus_col <- colorRampPalette(my_palette)(length(genus))#
genus_col <- my_palette[1:length(genus)]
names(genus_col) <- genus
pvalue_col_fun = circlize::colorRamp2(c(1,0.1,0.05), c("red", "white", "lightseagreen"))
library(ComplexHeatmap)
ha_row <- HeatmapAnnotation(
#'Control vs Stress'=anno_simple(-log10(tax_heatmap$p_val),col = pvalue_col_fun, pch = na_if(tax_heatmap$sign,"ns"), gp = gpar(circlize::fontsize(1))),
Genus=anno_simple(tax_heatmap$Genus, col = genus_col),
which = "row")
ha_row_txt <- rowAnnotation(labels = anno_text(rownames(tax_heatmap), which = "row",gp=gpar(fontsize=10, face= "italic")))
ha_col = HeatmapAnnotation('Stress group'=meta$Stress_group,
col = list('Stress group'=c("Stress"=col_fil[2],
"Control"=col_fil[1]))
# , width = max_text_width(unlist(text_list))
)
Hist <- ComplexHeatmap::pheatmap(plot,
cluster_cols = TRUE, cluster_rows = TRUE,
name="Z-score", col=circlize::colorRamp2(c(-2, 0, 2), c("dodgerblue4", "white","deeppink3")),
top_annotation = ha_col,
left_annotation = ha_row,
right_annotation = ha_row_txt,
show_colnames = FALSE,
show_rownames = FALSE,
heatmap_legend_param = list(legend_direction = "horizontal")
)
Hist# define the two legend
lgd_genus = Legend(title = "Genus", legend_gp = gpar(fill = genus_col),labels = genus, ncol = 3)
lgd_sig = Legend(title= " ", pch = "*", type = "points", labels = "p < 0.05")
pvalue_col_fun = colorRamp2(c(1,0.1,0.05), c("red", "white", "lightseagreen"))
lgd_pvalue = Legend(title = "p value",
col_fun = pvalue_col_fun,
at = c(0, 1, 2),
labels = c("1","0.1","0.05"),
direction = "vertical")
p_Deseq_heatmap <-draw(Hist,
heatmap_legend_list=list(lgd_genus
#lgd_pvalue,
#lgd_sig
),
heatmap_legend_side = "bottom", annotation_legend_side = "bottom") ## [1] "Stress_group" "Sex_child" "Education_mother" "Timepoint"
Capscale examines the correlation of each varible independently. Adonis uses dbRDA to quantidy the effect of the most strongest correlating factor, removing it and then examining the remaining effect of the next factor and so forth…
## [1] "Infant_temp_NEG" "Infant_temp_REG" "Infant_temp_SUR" "Infant_weight_w2"
## [5] "Infant_weight_3m" "Timepoint"
## [1] "Infant_temp_NEG" "Infant_temp_REG" "Infant_temp_SUR" "Infant_weight_w2"
## [5] "Infant_weight_3m" "Timepoint"
## [1] "Hair cortisol" "HM_cortisol_AUC_1"
## [3] "HM_cortisol_AUC_2" "Saliva_cortisol_morning_peak"
## [5] "Timepoint"
## [1] "Hair cortisol" "HM_cortisol_AUC_1"
## [3] "HM_cortisol_AUC_2" "Saliva_cortisol_morning_peak"
## [5] "Timepoint"
## Accumulate the abundance along the tree branches...
## Compute pairwise distances ...
## Completed!
# create an trans_beta object
# measure parameter must be one of names(dataset$beta_diversity)
t1 <- trans_beta$new(dataset = meco_dat, group = variable, measure = "bray")
#PCoA, PCA and NMDS are available
t1$cal_ordination(ordination = "PCoA")
# t1$res_ordination is the ordination result list
#class(t1$res_ordination)
# plot the PCoA result with confidence ellipse
#t1$plot_ordination(plot_color = variable, plot_shape = variable, plot_type = c("point", "ellipse"))# calculate and plot sample distances within groups
t1$cal_group_distance(within_group = TRUE)
# return t1$res_group_distance
# perform Wilcoxon Rank Sum and Signed Rank Tests
t1$cal_group_distance_diff(method = "wilcox")
# plot_group_order parameter can be used to adjust orders in x axis
t1$plot_group_distance(boxplot_add = "mean")# for the whole comparison and for each paired groups
t1$cal_betadisper()
## The result is stored in object$res_betadisper ...
t1$res_betadisper##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.08297 0.082974 7.8283 999 0.007 **
## Residuals 178 1.88668 0.010599
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
## Control Stress
## Control 0.008
## Stress 0.0057101
variable = "Timepoint"
# create an trans_beta object
# measure parameter must be one of names(dataset$beta_diversity)
t1 <- trans_beta$new(dataset = meco_dat, group = variable, measure = "bray")
#PCoA, PCA and NMDS are available
t1$cal_ordination(ordination = "PCoA")
# t1$res_ordination is the ordination result list
#class(t1$res_ordination)
# plot the PCoA result with confidence ellipse
#t1$plot_ordination(plot_color = variable, plot_shape = variable, plot_type = c("point", "ellipse"))# calculate and plot sample distances within groups
t1$cal_group_distance(within_group = TRUE)
# return t1$res_group_distance
# perform Wilcoxon Rank Sum and Signed Rank Tests
t1$cal_group_distance_diff(method = "wilcox")
# plot_group_order parameter can be used to adjust orders in x axis
t1$plot_group_distance(boxplot_add = "mean")# for the whole comparison and for each paired groups
t1$cal_betadisper()
## The result is stored in object$res_betadisper ...
t1$res_betadisper##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.07593 0.075928 7.179 999 0.013 *
## Residuals 178 1.88262 0.010577
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
## p10 p24
## p10 0.012
## p24 0.0080681
#env <- select(meco_dat$sample_table,all_of(effect.size.variables)) %>%
# mutate(Stress_group = as.numeric(as.character(Stress_group))) %>%
# select(-c("Batch","AB.infant")) %>%
# select(where(is.numeric)) %>%
# drop_na(Gestational.age) %>%
# drop_na() %>%
# select_if(~ !any(is.na(.)))
env <- meco_dat$sample_table %>%
dplyr::select(key.vars) %>%
dplyr::select(-Timepoint) %>%
drop_na() %>%
#tidyr::drop_na() %>% #Drop any rows with NAs
dplyr::mutate_if(is.character, as.factor)
#Fix environmental variable names
colnames(env) <- colnames(env) %>% gsub("_"," ",.)
str(env)## 'data.frame': 173 obs. of 6 variables:
## $ Stress group : Factor w/ 2 levels "Control","Stress": 1 1 1 1 1 1 1 1 1 1 ...
## $ Sex child : int 0 1 0 1 0 0 1 0 1 0 ...
## $ PSS stress score : int 6 8 17 13 16 18 16 10 7 8 ...
## $ LSCr stress score: int 12 5 15 0 18 5 3 17 6 7 ...
## $ Education mother : int 8 9 7 8 8 9 7 8 8 8 ...
## $ BMI mother : num 23.9 21.9 25.9 22.3 20.8 ...
## [1] "Stress group" "Sex child" "PSS stress score"
## [4] "LSCr stress score" "Education mother" "BMI mother"
Creating trans_env object
# use Genus
t1$cal_ordination(method = "RDA", taxa_level = "Genus")
# As the main results of RDA are related with the projection and angles between different arrows,
# we adjust the length of the arrow to show them clearly using several parameters.
t1$trans_ordination(show_taxa = 10, adjust_arrow_length = TRUE,
min_perc_env = 0.2,
max_perc_env = 0.5,
min_perc_tax = 1.5,
max_perc_tax = 2.5
)
# t1$res_rda_trans is the transformed result for plot
rda.group <- t1$plot_ordination(plot_color = "Stress_group",
plot_type = c("point"),
env_text_size = 4,
taxa_text_size = 4,
taxa_text_color="darkred",
taxa_arrow_color="darkred",
point_alpha=0.2,
taxa_nudge_x = c(0,0,0,-150,0,0,0,0,0,0),
taxa_nudge_y = c(0,100,300,0,-200,-100,-100,-200,2,0)) +
theme_classic() +
theme(plot.title = element_text(size = 12, face = "bold",hjust = 0.5),
strip.background = element_blank(),
strip.text.x = element_blank()
#legend.position = "none"
) +
labs(color="Stress group",fill="") +
scale_fill_discrete(guide = FALSE)
rda.groupThe correlations between environmental variables and taxa are important in analyzing and inferring the factors affecting community structure. Let’s first perform a correlation heatmap using relative abundance data at Genus level with the cal_cor function. The parameter p_adjust_type can control the p value adjustment type.
env <- meco_dat$sample_table %>% dplyr::select(key.vars.cortisol)
#env <- meco_dat$sample_table %>% dplyr::select(key.vars.inf)
#env <- meco_dat$sample_table %>% dplyr::select(key.vars)
t1 <- trans_env$new(dataset = meco_dat, add_data = env)# 'p_adjust_type = "Env"' means p adjustment is performed for each environmental variable separately.
t1$cal_cor(use_data = "Genus", p_adjust_method = "fdr", p_adjust_type = "Env")Then, we can plot the correlation results using plot_cor function.
There are too many genera. We can use the filter_feature parameter in plot_cor to filter some taxa that do not have any significance < 0.001.
# filter genera that donot have at least one ** or ***
t1$plot_cor(filter_feature = c(""),cluster_ggplot = "both")# 'p_adjust_type = "Env"' means p adjustment is performed for each environmental variable separately.
t1$cal_cor(use_data = "Genus", p_adjust_method = "fdr", p_adjust_type = "Env",by_group = "Day")There are too many genera. We can use the filter_feature parameter in plot_cor to filter some taxa that do not have any significance < 0.001.
# filter genera that donot have at least one ** or ***
t1$plot_cor(filter_feature = c(""),cluster_ggplot = "both",pheatmap = FALSE)pearson.cortisol.day <- t1$plot_cor(filter_feature = c(""),cluster_ggplot = "both",pheatmap = FALSE) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))Sometimes, if the user wants to do the correlation analysis between the environmental factors and some important taxa detected in the biomarker analysis, please use other_taxa parameter in cal_cor function.
# first create trans_diff object as a demonstration
t2 <- trans_diff$new(dataset = meco_dat, method = "rf", group = "Stress_group", taxa_level = "Genus",p_adjust_method = "none")
# then create trans_env object
t1 <- trans_env$new(dataset = meco_dat, add_data = env)
# use other_taxa to select taxa you need
t1$cal_cor(use_data = "other", p_adjust_method = "fdr", other_taxa = t2$res_diff$Taxa)
t1$plot_cor()The pheatmap method is also available. Note that, besides the color_vector parameter, color_palette can also be used to control color palette with customized colors.
# clustering heatmap; require pheatmap package
# Let's take another color pallete
t1$plot_cor(pheatmap = TRUE, color_palette = rev(RColorBrewer::brewer.pal(n = 9, name = "RdYlBu")))## TableGrob (5 x 6) "layout": 6 grobs
## z cells name grob
## 1 1 (2-2,3-3) col_tree polyline[GRID.polyline.4202]
## 2 2 (4-4,1-1) row_tree polyline[GRID.polyline.4203]
## 3 3 (4-4,3-3) matrix gTree[GRID.gTree.4206]
## 4 4 (5-5,3-3) col_names text[GRID.text.4207]
## 5 5 (4-4,4-4) row_names text[GRID.text.4208]
## 6 6 (3-5,5-5) legend gTree[GRID.gTree.4211]
Sometimes, if it is needed to study the correlations between environmental variables and taxa for different groups, by_group parameter can be used for this goal.
# calculate correlations for different groups using parameter by_group
t1$cal_cor(by_group = "Timepoint", p_adjust_method = "fdr")
# return t1$res_cor
t1$plot_cor(filter_feature = c("", "*")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))Ecological researchers are usually interested in the the funtional profiles of microbial communities, because functional or metabolic data is powerful to explain the structure and dynamics of microbial communities. As metagenomic sequencing is complicated and expensive, using amplicon sequencing data to predict functional profiles is an alternative choice. Several software are often used for this goal, such as PICRUSt (Langille et al. 2013), Tax4Fun (Aßhauer et al. 2015) and FAPROTAX (Stilianos Louca et al. 2016; S. Louca, Parfrey, and Doebeli 2016). These tools are great to be used for the prediction of functional profiles based on the prokaryotic communities from sequencing results. In addition, it is also important to obtain the traits or functions for each taxa, not just the whole profile of communities. FAPROTAX database is a collection of the traits and functions of prokaryotes based on the known research results published in books and literatures. We match the taxonomic information of prokaryotes against this database to predict the traits of prokaryotes on biogeochemical roles. The NJC19 database (Lim et al. 2020) is also available for animal-associated prokaryotic data, such as human gut microbiota. We also implement the FUNGuild (Nguyen et al. 2016) and FungalTraits (Põlme et al. 2020) databases to predict the fungal traits. The idea identifying prokaryotic traits and functional redundancy was initially inspired by our another study (Liu et al. 2022).
We first identify/predict traits of taxa with the prokaryotic example data.
# create object of trans_func
t2 <- trans_func$new(meco_dat)
# mapping the taxonomy to the database
# this can recognize prokaryotes or fungi automatically if the names of taxonomic levels are standard.
# for fungi example, see https://chiliubio.github.io/microeco_tutorial/other-dataset.html#fungi-data
# default database for prokaryotes is FAPROTAX database
t2$cal_spe_func(prok_database = "FAPROTAX")The percentages of the OTUs having the same trait can reflect the functional redundancy of this function in the community.
# calculate the percentages for communities
# here do not consider the abundance
t2$cal_spe_func_perc(abundance_weighted = FALSE)Then we also take an example to show the percentages of the OTUs for each trait in network modules.
# construct a network for the example
network <- trans_network$new(dataset = meco_dat, cal_cor = "base", taxa_level = "OTU", filter_thres = 0.0001, cor_method = "spearman")
network$cal_network(p_thres = 0.01, COR_cut = 0.7)
network$cal_module()
# convert module info to microtable object
meco_module <- network$trans_comm(use_col = "module")
meco_module_func <- trans_func$new(meco_module)
meco_module_func$cal_spe_func(prok_database = "FAPROTAX")
meco_module_func$cal_spe_func_perc(abundance_weighted = FALSE)
meco_module_func$plot_spe_func_perc(order_x = paste0("M", 1:10))# then we try to correlate the res_spe_func_perc of communities to environmental variables
t3 <- trans_env$new(dataset = meco_dat, add_data = env)
t3$cal_cor(add_abund_table = t2$res_spe_func_perc, cor_method = "spearman")
t3$plot_cor(pheatmap = TRUE
#filter_feature = c("")
)## TableGrob (5 x 6) "layout": 6 grobs
## z cells name grob
## 1 1 (2-2,3-3) col_tree polyline[GRID.polyline.4419]
## 2 2 (4-4,1-1) row_tree polyline[GRID.polyline.4420]
## 3 3 (4-4,3-3) matrix gTree[GRID.gTree.4423]
## 4 4 (5-5,3-3) col_names text[GRID.text.4424]
## 5 5 (4-4,4-4) row_names text[GRID.text.4425]
## 6 6 (3-5,5-5) legend gTree[GRID.gTree.4428]
Outputs saved, and figure manually arranged in Powerpoint
## R version 4.2.1 (2022-06-23 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=Danish_Denmark.utf8 LC_CTYPE=Danish_Denmark.utf8
## [3] LC_MONETARY=Danish_Denmark.utf8 LC_NUMERIC=C
## [5] LC_TIME=Danish_Denmark.utf8
##
## attached base packages:
## [1] stats4 grid stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] cowplot_1.1.3 igraph_1.6.0
## [3] pheatmap_1.0.12 ComplexHeatmap_2.12.1
## [5] circlize_0.4.15 DESeq2_1.36.0
## [7] rabuplot_0.0.1.08 beemStatic_0.0.1
## [9] SpiecEasi_1.1.2 chorddiag_0.1.3
## [11] microViz_0.10.8 ampvis2_2.8.6
## [13] Maaslin2_1.10.0 ggtree_3.4.4
## [15] decontam_1.16.0 miaViz_1.4.0
## [17] ggraph_2.1.0 mia_1.4.0
## [19] MultiAssayExperiment_1.22.0 TreeSummarizedExperiment_2.4.0
## [21] Biostrings_2.64.1 XVector_0.36.0
## [23] SingleCellExperiment_1.18.1 SummarizedExperiment_1.26.1
## [25] Biobase_2.56.0 GenomicRanges_1.48.0
## [27] GenomeInfoDb_1.32.4 IRanges_2.30.1
## [29] S4Vectors_0.34.0 BiocGenerics_0.42.0
## [31] MatrixGenerics_1.8.1 matrixStats_1.2.0
## [33] ANCOMBC_1.6.4 phyloseq_1.40.0
## [35] here_1.0.1 GUniFrac_1.8
## [37] microeco_1.4.0 zscorer_0.3.1
## [39] VennDiagram_1.7.3 futile.logger_1.4.3
## [41] ggalluvial_0.12.5 UpSetR_1.4.0
## [43] RColorBrewer_1.1-3 ggsci_3.0.0
## [45] ggpubr_0.6.0 rstatix_0.7.2
## [47] vegan_2.6-4 lattice_0.22-5
## [49] permute_0.9-7 lubridate_1.9.3
## [51] forcats_1.0.0 stringr_1.5.1
## [53] dplyr_1.1.4 purrr_1.0.2
## [55] readr_2.1.5 tidyr_1.3.1
## [57] tibble_3.2.1 ggplot2_3.4.4
## [59] tidyverse_2.0.0 ggprism_1.0.4
## [61] BiocManager_1.30.22 devtools_2.4.5
## [63] usethis_2.2.2 patchwork_1.2.0
## [65] DT_0.31 ggtext_0.1.2
## [67] tidytext_0.4.1
##
## loaded via a namespace (and not attached):
## [1] metagMisc_0.0.4 graphlayouts_1.1.0
## [3] vctrs_0.6.5 expm_0.999-9
## [5] mgcv_1.9-1 gmp_0.7-4
## [7] rmutil_1.1.10 blob_1.2.4
## [9] survival_3.5-7 later_1.3.2
## [11] nloptr_2.0.3 DBI_1.2.1
## [13] zlibbioc_1.42.0 fBasics_4032.96
## [15] timeSeries_4032.109 GlobalOptions_0.1.2
## [17] htmlwidgets_1.6.4 mvtnorm_1.2-4
## [19] inline_0.3.19 parallel_4.2.1
## [21] scater_1.24.0 irlba_2.3.5.1
## [23] markdown_1.12 DEoptimR_1.1-3
## [25] tidygraph_1.3.0 Rcpp_1.0.12
## [27] KernSmooth_2.23-22 promises_1.2.1
## [29] DelayedArray_0.22.0 limma_3.52.4
## [31] pkgload_1.3.4 magick_2.8.2
## [33] Hmisc_5.1-1 fs_1.6.3
## [35] textshaping_0.3.7 png_0.1-8
## [37] digest_0.6.34 glmnet_4.1-8
## [39] janeaustenr_1.0.0 pkgconfig_2.0.3
## [41] ggnewscale_0.4.9 DelayedMatrixStats_1.18.2
## [43] ggbeeswarm_0.7.2 estimability_1.4.1
## [45] iterators_1.0.14 minqa_1.2.6
## [47] biglm_0.9-2.1 beeswarm_0.4.0
## [49] GetoptLong_1.0.5 selectiveInference_1.2.5
## [51] xfun_0.41 bslib_0.6.1
## [53] zoo_1.8-12 tidyselect_1.2.0
## [55] reshape2_1.4.4 pcaPP_2.0-4
## [57] viridisLite_0.4.2 pkgbuild_1.4.3
## [59] rlang_1.1.3 jquerylib_0.1.4
## [61] Rmpfr_0.9-5 glue_1.7.0
## [63] lambda.r_1.2.4 emmeans_1.10.0
## [65] ggsignif_0.6.4 labeling_0.4.3
## [67] httpuv_1.6.14 biomformat_1.24.0
## [69] class_7.3-22 BiocNeighbors_1.14.0
## [71] TH.data_1.1-2 tokenizers_0.3.0
## [73] Wrench_1.14.0 annotate_1.74.0
## [75] jsonlite_1.8.8 systemfonts_1.0.5
## [77] bit_4.0.5 mime_0.12
## [79] gridExtra_2.3 gplots_3.1.3
## [81] Exact_3.2 stringi_1.8.3
## [83] gsl_2.1-8 rbibutils_2.2.16
## [85] yulab.utils_0.1.4 bitops_1.0-7
## [87] cli_3.6.2 Rdpack_2.6
## [89] rhdf5filters_1.8.0 RSQLite_2.3.5
## [91] randomForest_4.7-1.1 spatial_7.3-17
## [93] data.table_1.14.10 energy_1.7-11
## [95] timechange_0.3.0 rstudioapi_0.15.0
## [97] microbiome_1.18.0 CVXR_1.0-11
## [99] nlme_3.1-164 locfit_1.5-9.8
## [101] DECIPHER_2.24.0 SnowballC_0.7.1
## [103] miniUI_0.1.1.1 gridGraphics_0.5-1
## [105] urlchecker_1.0.1 stable_1.1.6
## [107] optparse_1.7.4 sessioninfo_1.2.2
## [109] readxl_1.4.3 lifecycle_1.0.4
## [111] timeDate_4032.109 commonmark_1.9.1
## [113] munsell_0.5.0 cellranger_1.1.0
## [115] statip_0.2.3 caTools_1.18.2
## [117] codetools_0.2-19 coda_0.19-4
## [119] vipor_0.4.7 htmlTable_2.4.2
## [121] xtable_1.8-4 formatR_1.14
## [123] lpsymphony_1.24.0 abind_1.4-5
## [125] farver_2.1.1 pulsar_0.3.11
## [127] aplot_0.2.2 futile.options_1.0.1
## [129] profvis_0.3.8 cluster_2.1.6
## [131] Matrix_1.6-5 tidytree_0.4.6
## [133] ellipsis_0.3.2 metagenomeSeq_1.38.0
## [135] adaptMCMC_1.5 multtest_2.52.0
## [137] remotes_2.4.2.1 lmerTest_3.1-3
## [139] getopt_1.20.4 htmltools_0.5.7
## [141] yaml_2.3.8 utf8_1.2.4
## [143] plotly_4.10.4 XML_3.99-0.16.1
## [145] e1071_1.7-14 foreign_0.8-86
## [147] withr_3.0.0 scuttle_1.6.3
## [149] BiocParallel_1.30.4 bit64_4.0.5
## [151] rngtools_1.5.2 doRNG_1.8.6
## [153] rootSolve_1.8.2.4 multcomp_1.4-25
## [155] foreach_1.5.2 robustbase_0.99-2
## [157] ragg_1.2.7 rsvd_1.0.5
## [159] ScaledMatrix_1.4.1 memoise_2.0.1
## [161] evaluate_0.23 VGAM_1.1-9
## [163] geneplotter_1.74.0 tzdb_0.4.0
## [165] lmom_3.0 fansi_1.0.6
## [167] highr_0.10 checkmate_2.3.1
## [169] cachem_1.0.8 rjson_0.2.21
## [171] ggrepel_0.9.5 ade4_1.7-22
## [173] clue_0.3-65 rprojroot_2.0.4
## [175] tools_4.2.1 stabledist_0.7-1
## [177] sass_0.4.8 sandwich_3.1-0
## [179] magrittr_2.0.3 RCurl_1.98-1.14
## [181] proxy_0.4-27 car_3.1-2
## [183] ape_5.7-1 ggplotify_0.1.2
## [185] xml2_1.3.6 httr_1.4.7
## [187] rmarkdown_2.25 boot_1.3-28.1
## [189] R6_2.5.1 Rhdf5lib_1.18.2
## [191] nnet_7.3-19 KEGGREST_1.36.3
## [193] DirichletMultinomial_1.38.0 genefilter_1.78.0
## [195] treeio_1.20.2 gtools_3.9.5
## [197] shape_1.4.6 statmod_1.5.0
## [199] beachmat_2.12.0 BiocSingular_1.12.0
## [201] rhdf5_2.40.0 splines_4.2.1
## [203] carData_3.0-5 ggfun_0.1.4
## [205] colorspace_2.1-0 generics_0.1.3
## [207] base64enc_0.1-3 gridtext_0.1.5
## [209] pillar_1.9.0 tweenr_2.0.2
## [211] GenomeInfoDbData_1.2.8 plyr_1.8.9
## [213] gtable_0.3.4 knitr_1.45
## [215] fastmap_1.1.1 Cairo_1.6-2
## [217] modeest_2.4.0 doParallel_1.0.17
## [219] AnnotationDbi_1.58.0 broom_1.0.5
## [221] scales_1.3.0 huge_1.3.5
## [223] backports_1.4.1 EnhancedVolcano_1.14.0
## [225] file2meco_0.7.0 lme4_1.1-35.1
## [227] gld_2.6.6 hms_1.1.3
## [229] ggforce_0.4.1 Rtsne_0.17
## [231] shiny_1.8.0 polyclip_1.10-6
## [233] numDeriv_2016.8-1.1 DescTools_0.99.53
## [235] lazyeval_0.2.2 Formula_1.2-5
## [237] crayon_1.5.2 MASS_7.3-60.0.1
## [239] sparseMatrixStats_1.8.0 viridis_0.6.5
## [241] rpart_4.1.23 compiler_4.2.1
## [243] intervals_0.15.4